#!/bin/sh

#to generate the artificial exomes

if [ $# != 6 ]
then
     echo "Usage: sh ../AUTOMATE/Exome_smasher.csh <tempdir> <VCF_LIKE> <ORIGINALBED FILE> <TOTAL NUMBER OF READS> <START CHROMSOME> <END CHROMOSOME>"
else
	set -x
	echo `date`
	DIR=$1
	VCF_LIKE=$2
	ORIGINAL_BED=$3
	TOTAL_NUM_READS=$4
	START_CHR=$5
	END_CHR=$6
	if [ -d "$DIR" ]
	then
		echo "WARNING : $DIR directory  exists!"
	else
		mkdir -p $$DIR
		echo "$DIR created!"
	fi
    	perl /data4/bsi/RandD/Workflow/devel/genome_simulator/SVN/GenomeSmasher/trunk/bin/Genome_Smasher_Exome.pl  -refgenome_dir  /data4/bsi/RandD/Workflow/temp/Wessim/refdir -input_vcf_like $VCF_LIKE -ins_files_dir /data4/bsi/RandD/Workflow/temp/Wessim/AUTOMATE -process_dir $DIR  -fasta_length 60
	perl /data4/bsi/RandD/Workflow/temp/Wessim/AUTOMATE/perl_split_vcf_maternal_paternal.pl $DIR/file.vcf $DIR/Maternal.vcf $DIR/Paternal.vcf
	perl /data4/bsi/RandD/Workflow/devel/genome_simulator/SVN/GenomeSmasher/trunk/bin/perl_rewrite_fasta.pl $DIR/maternal_CNVchr22.fa > $DIR/final_maternal_CNVchr22.fa
	perl /data4/bsi/RandD/Workflow/devel/genome_simulator/SVN/GenomeSmasher/trunk/bin/perl_rewrite_fasta.pl $DIR/paternal_CNVchr22.fa > $DIR/final_paternal_CNVchr22.fa
	pos=`grep -v ^# $DIR/Paternal.vcf |sort -k2,2n|cut -f2|tr '\n' ','|sed -e 's/\,$//g'`
	event=`grep -v ^# $DIR/Paternal.vcf |sort -k2,2n|cut -f5|tr '\n' ','|sed -e 's/\,$//g'`
	num_event=`grep -v ^# $DIR/Paternal.vcf |sort -k2,2n|cut -f10|cut -f2 -d :|tr '\n' ','|sed -e 's/\,$//g'`
	event_length=`grep -v ^# $DIR/Paternal.vcf |sort -k2,2n|cut -f8|cut -f4 -d =|tr '\n' ','|sed -e 's/\,$//g'`
	perl /data4/bsi/RandD/Workflow/temp/Wessim/AUTOMATE/perl_modify_bed_insert_delete.pl $pos $event $num_event $event_length $ORIGINAL_BED $DIR/SureSelect_All_Exon_V4+UTRs_with_annotation_hg19_chr22_q13_2_Paternal.bed
	pos=`grep -v ^# $DIR/Maternal.vcf |sort -k2,2n|cut -f2|tr '\n' ','|sed -e 's/\,$//g'`
        event=`grep -v ^# $DIR/Maternal.vcf |sort -k2,2n|cut -f5|tr '\n' ','|sed -e 's/\,$//g'`
        num_event=`grep -v ^# $DIR/Maternal.vcf |sort -k2,2n|cut -f10|cut -f2 -d :|tr '\n' ','|sed -e 's/\,$//g'`
        event_length=`grep -v ^# $DIR/Maternal.vcf |sort -k2,2n|cut -f8|cut -f4 -d =|tr '\n' ','|sed -e 's/\,$//g'`
        perl /data4/bsi/RandD/Workflow/temp/Wessim/AUTOMATE/perl_modify_bed_insert_delete.pl $pos $event $num_event $event_length $ORIGINAL_BED $DIR/SureSelect_All_Exon_V4+UTRs_with_annotation_hg19_chr22_q13_2_Maternal.bed
	arr=`perl /data4/bsi/RandD/Workflow/temp/Wessim/AUTOMATE/perl_recalc.pl $DIR $TOTAL_NUM_READS $START_CHR $END_CHR`
	total=`echo $arr|cut -f1 -d ' '`
	total_mat=`echo $arr|cut -f2 -d ' '`
	total_pat=`echo $arr|cut -f3 -d ' '`
	cut -f1-3 $ORIGINAL_BED > $DIR/SureSelect.bed
	cut -f1-3 $DIR/SureSelect_All_Exon_V4+UTRs_with_annotation_hg19_chr22_q13_2_Paternal.bed > $DIR/SureSelect_Paternal.bed
	cut -f1-3 $DIR/SureSelect_All_Exon_V4+UTRs_with_annotation_hg19_chr22_q13_2_Maternal.bed > $DIR/SureSelect_Maternal.bed
	/usr/local/biotools/python/2.7/bin/python /data4/bsi/RandD/Workflow/temp/Wessim/Wessim/Wessim1.py -R /data4/bsi/RandD/Workflow/temp/Wessim/refdir/chr22.fa -B $DIR/SureSelect.bed -n $total -l 100 -M /data4/bsi/RandD/Workflow/temp/Wessim/GemSIM_v1.6/models/ill100v4_s.gzip -z  -o $DIR/NOCNVINDEL_CHR22_q13_2 -t 4
	/usr/local/biotools/python/2.7/bin/python /data4/bsi/RandD/Workflow/temp/Wessim/Wessim/Wessim1.py -R $DIR/final_maternal_CNVchr22.fa -B $DIR/SureSelect_Maternal.bed -n $total_mat -l 100 -M /data4/bsi/RandD/Workflow/temp/Wessim/GemSIM_v1.6/models/ill100v4_s.gzip -z  -o $DIR/CNV_INDEL_CHR22_q13_2_MAT -t 4
	/usr/local/biotools/python/2.7/bin/python /data4/bsi/RandD/Workflow/temp/Wessim/Wessim/Wessim1.py -R $DIR/final_paternal_CNVchr22.fa -B $DIR/SureSelect_Paternal.bed -n $total_pat -l 100 -M /data4/bsi/RandD/Workflow/temp/Wessim/GemSIM_v1.6/models/ill100v4_s.gzip -z  -o $DIR/CNV_INDEL_CHR22_q13_2_PAT -t 4
	zcat $DIR/CNV_INDEL_CHR22_q13_2_MAT.fastq.gz $DIR/CNV_INDEL_CHR22_q13_2_PAT.fastq.gz |gzip > $DIR/CNV_INDEL_CHR22_q13_2.fastq.gz
	
	script_path=/projects/bsi/bictools/scripts/dnaseq/GENOME_GPS/tags/1.2.1
	R1=$DIR/CNV_INDEL_CHR22_q13_2.fastq.gz
	novoalign=/projects/bsi/bictools/apps/alignment/novoalign/2.08.01/novoalign
	samtools=/projects/bsi/bictools/apps/alignment/samtools/samtools-0.1.18/
	sample=simulated
	GenomeBuild=hg19
	platform=simulated
	center=simulated
	ILL2SANGER1=`perl $script_path/checkFastqQualityScores.pl $R1 10000`
	genome_novo=/data2/bsi/reference/sequence/human/ncbi/37.1/indexed/allchr.nix
	ref=/data2/bsi/reference/sequence/human/ncbi/37.1/allchr.fa
	paramaters="-x 5  -r Random --hdrhd off -v 120 -c 4"
########################################################
######          Run novoalign for PE or SR
	if [ $ILL2SANGER1 -gt 65 ]
	then
			qual="-F ILMFQ"
	else
		qual="-F STDFQ"
	fi
	$novoalign $paramaters -d $genome_novo $qual -f $R1  -o SAM "@RG\tID:$sample\tSM:$sample\tLB:$GenomeBuild\tPL:$platform\tCN:$center"  | $samtools/samtools view -bS - > $DIR/CNV_INDEL_CHR22_q13_2.bam

	R1=$DIR/CNV_INDEL_CHR22_q13_2_PAT.fastq.gz
	ILL2SANGER1=`perl $script_path/checkFastqQualityScores.pl $R1 10000`
		if [ $ILL2SANGER1 -gt 65 ]
	then
			qual="-F ILMFQ"
	else
		qual="-F STDFQ"
	fi
	$novoalign $paramaters -d $genome_novo $qual -f $R1  -o SAM "@RG\tID:$sample\tSM:$sample\tLB:$GenomeBuild\tPL:$platform\tCN:$center"  | $samtools/samtools view -bS - > $DIR/CNV_INDEL_CHR22_q13_2_PAT.bam
	
	
	R1=$DIR/CNV_INDEL_CHR22_q13_2_MAT.fastq.gz
	ILL2SANGER1=`perl $script_path/checkFastqQualityScores.pl $R1 10000`
	if [ $ILL2SANGER1 -gt 65 ]
	then
	        qual="-F ILMFQ"
	else
	        qual="-F STDFQ"
	fi
	$novoalign $paramaters -d $genome_novo $qual -f $R1  -o SAM "@RG\tID:$sample\tSM:$sample\tLB:$GenomeBuild\tPL:$platform\tCN:$center"  | $samtools/samtools view -bS - > $DIR/CNV_INDEL_CHR22_q13_2_MAT.bam
	

	R1=$DIR/NOCNVINDEL_CHR22_q13_2.fastq.gz
	ILL2SANGER1=`perl $script_path/checkFastqQualityScores.pl $R1 10000`
        if [ $ILL2SANGER1 -gt 65 ]
        then
                qual="-F ILMFQ"
        else
                qual="-F STDFQ"
        fi
        $novoalign $paramaters -d $genome_novo $qual -f $R1  -o SAM "@RG\tID:$sample\tSM:$sample\tLB:$GenomeBuild\tPL:$platform\tCN:$center"  | $samtools/samtools view -bS - > $DIR/NO_CNV_INDEL_CHR22_q13_2.bam
	mkdir $DIR/sorted_CNV_INDEL_CHR22_q13_2_MAT
	mkdir $DIR/sorted_CNV_INDEL_CHR22_q13_2_PAT
	mkdir $DIR/sorted_CNV_INDEL_CHR22_q13_2
	mkdir $DIR/sorted_NO_CNV_INDEL_CHR22_q13_2
	sh /data2/bsi/staff_analysis/m088341/tools/misc/sortbam.sh CNV_INDEL_CHR22_q13_2_MAT.bam $DIR $DIR/sorted_CNV_INDEL_CHR22_q13_2_MAT simulated
	sh /data2/bsi/staff_analysis/m088341/tools/misc/sortbam.sh CNV_INDEL_CHR22_q13_2_PAT.bam $DIR $DIR/sorted_CNV_INDEL_CHR22_q13_2_PAT simulated
	sh /data2/bsi/staff_analysis/m088341/tools/misc/sortbam.sh CNV_INDEL_CHR22_q13_2.bam $DIR $DIR/sorted_CNV_INDEL_CHR22_q13_2 simulated
	sh /data2/bsi/staff_analysis/m088341/tools/misc/sortbam.sh NO_CNV_INDEL_CHR22_q13_2.bam $DIR $DIR/sorted_NO_CNV_INDEL_CHR22_q13_2 simulated
fi
